Packages

pacman::p_load(
  dplyr,
  tidyverse,
  readxl,
  survival,
  survminer,
  survMisc,# the one used
  ggfortify,
  ggthemes,
  ggtext,
  RColorBrewer,
  cowplot,
  PoweR,
  stats,
  ghibli)

# setwd("D:/PhD/01_Data/02_Probiotics/Model_2") # Windows
# setwd("/Volumes/Yiming_Wang/PhD/01_Data/02_Probiotics/Model_2") # Mac
# setwd("/Users/godric/Desktop/Fut2 review comments/Revision") #Macbook pro
setwd("/Users/godric/Documents/GitHub/2024_ISME_Fut2_Bifidobacterium/01_Data")

df_all_model2 <- read_excel("Model_2_probiotic.xlsx")

ghibli_palettes$MononokeLight
## <colors>
## #838A90FF #BA968AFF #9FA7BEFF #B3B8B1FF #E7A79BFF #F2C695FF #F5EDC9FF

Abx & Probiotics treatment

Shape data

df_shape_detection_abx <- df_all_model2 %>% 
  filter(Model == "2") %>% 
  filter(Group == "ABX_PRO") %>% 
  select(Timepoint, Group, Treatment, GenotypeSex, Mice_ID, Detection) %>% 
  pivot_wider(names_from = Timepoint,
              values_from = Detection) %>% 
  mutate(Disappear_time = ifelse(D14_post_gavage == "Positive", 14,
                              ifelse(D10_post_gavage == "Positive", 14,
                                     ifelse (D7_post_gavage == "Positive", 10,
                                             ifelse(D5_post_gavage == "Positive",7,
                                                    ifelse (D3_post_gavage == "Positive", 5,
                                                            ifelse(D1_post_gavage == "Positive", 3, 1))))))) %>% 
  mutate(status = ifelse(D14_post_gavage == "Positive", 0,1)) %>% 
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female")))

Genotype x sex in Male

Infantis

set.seed(123)
# All four together

## Shape data
df_infantis_abxpro<- df_shape_detection_abx %>% 
  filter(Treatment == "Infantis") 

df_infantis_abxpro_Male <- df_shape_detection_abx %>% 
  filter(Treatment == "Infantis" & Sex == "Male")


## log-rank test
### We can conduct between-group significance tests using a log-rank test. The log-rank test equally weights observations over the entire follow-up time and is the most common way to compare survival times between groups. 
survdiff(Surv(Disappear_time, status) ~ Genotype + Sex, data = df_infantis_abxpro_Male)
## Call:
## survdiff(formula = Surv(Disappear_time, status) ~ Genotype + 
##     Sex, data = df_infantis_abxpro_Male)
## 
##                         N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=WT, Sex=Male   8        7    10.77      1.32      11.4
## Genotype=KO, Sex=Male   8        8     4.23      3.35      11.4
## 
##  Chisq= 11.4  on 1 degrees of freedom, p= 7e-04
# Male (WT vs KO)

fit_genotypesex_infantis_Male <- survfit(Surv(Disappear_time, status) ~ GenotypeSex, data = df_infantis_abxpro_Male)
figure_genotypesex_infantis_Male <- ggsurvplot(fit_genotypesex_infantis_Male, data = df_infantis_abxpro_Male,
           size = 1,
           title = " ",
           font.title = 10,
           xlim=c(1,14),
           xlab = "Days (D1 - D14 post-gavage)",
           break.time.by = 2,
           surv.scale = "percent",
           ylim=c(0,1.15),
           ylab = "B. infantis persistence rate",
           break.y.by = 0.2,
           font.x=16,
           font.y=16,
           pval = FALSE, pval.method = FALSE,
           pval.coord = c(1,0.05),
           pval.method.coord = c(1,0.15),
           pval.size = 5,
           pval.method.size=5,
           conf.int = FALSE,
           conf.int.style = c("ribbon", "step"),
           conf.int.alpha = 0.2,
           surv.plot.height = 1,
           legend = c("bottom"),
           legend.title = "Genotype x Sex",
           legend.labs = c("WT Male","KO Male"),
           palette = c("#3C5488B2","#7E6148B2"),
           ggtheme = theme(axis.text.x=element_text(colour="black", face="plain", size=16), 
                           axis.text.y=element_text(colour="black", face="plain", size=16),
                           axis.title.x=element_text(margin = margin(t = 5), size=16),
                           axis.title.y=element_text(margin = margin(r = 5), size=16),
                           axis.title = element_text(face="plain", size = 10),
                           plot.title = element_text(size=10, hjust=0.5),
                           legend.title = element_text(size = 14),
                           legend.text = element_text(size = 14),
                           panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
                           panel.background = element_rect(fill = "white"),
                           legend.background = element_rect(color="transparent"),
                           aspect.ratio = 1,
                           plot.background = element_rect(fill="transparent", colour = "transparent"))
           )

figure_genotypesex_infantis_Male$plot <- figure_genotypesex_infantis_Male$plot + 
                                        ggplot2::annotate("text", 
                                        x = 8, y = 1.15, # x and y coordinates of the text
                                        label=expression(paste("Log-rank; ",italic("P") == "0.00074")),hjust=0.5, size=5.5)

figure_genotypesex_infantis_Male

Bifidum

set.seed(123)
# All four together

## Shape data
df_bifidum_abxpro<- df_shape_detection_abx %>% 
  filter(Treatment == "Bifidum")

df_bifidum_abxpro_Male<- df_shape_detection_abx %>% 
  filter(Treatment == "Bifidum" & Sex == "Male")

## log-rank test
### We can conduct between-group significance tests using a log-rank test. The log-rank test equally weights observations over the entire follow-up time and is the most common way to compare survival times between groups. 
survdiff(Surv(Disappear_time, status) ~ Genotype + Sex, data = df_bifidum_abxpro_Male)
## Call:
## survdiff(formula = Surv(Disappear_time, status) ~ Genotype + 
##     Sex, data = df_bifidum_abxpro_Male)
## 
##                         N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=WT, Sex=Male   8        8     6.68     0.261      1.19
## Genotype=KO, Sex=Male   8        8     9.32     0.187      1.19
## 
##  Chisq= 1.2  on 1 degrees of freedom, p= 0.3
# Male (WT vs KO) p = 0.28

fit_genotypesex_bifidum_Male <- survfit(Surv(Disappear_time, status) ~ GenotypeSex, data = df_bifidum_abxpro_Male)
figure_genotypesex_bifidum_Male <- ggsurvplot(fit_genotypesex_bifidum_Male, data = df_bifidum_abxpro_Male,
           size = 1,
           title = " ",
           font.title = 10,
           xlim=c(1,14),
           xlab = "Days (D1 - D14 post-gavage)",
           break.time.by = 2,
           surv.scale = "percent",
           ylim=c(0,1.15),
           ylab = "B. bifidum persistence rate",
           break.y.by = 0.2,
           font.x=14,
           font.y=14,
           pval = FALSE, pval.method = FALSE,
           pval.coord = c(1,0.05),
           pval.method.coord = c(1,0.1),
           pval.size = 5,
           pval.method.size=5,
           conf.int = FALSE,
           conf.int.style = c("ribbon", "step"),
           conf.int.alpha = 0.2,
           surv.plot.height = 1,
           legend = c("bottom"),
           legend.title = "Genotype x Sex",
           legend.labs = c("WT Male","KO Male"),
           palette = c("#3C5488B2","#7E6148B2"),
           ggtheme = theme(axis.text.x=element_text(colour="black", face="plain", size=16), 
                           axis.text.y=element_text(colour="black", face="plain", size=16),
                           axis.title.x=element_text(margin = margin(t = 5), size=16),
                           axis.title.y=element_text(margin = margin(r = 5), size=16),
                           axis.title = element_text(face="plain", size = 10),
                           plot.title = element_text(size=10, hjust=0.5),
                           legend.title = element_text(size = 14),
                           legend.text = element_text(size = 14),
                           panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
                           panel.background = element_rect(fill = "white"),
                           legend.background = element_rect(color="transparent"),
                           aspect.ratio = 1,
                           plot.background = element_rect(fill="transparent", colour = "transparent"))
           )

figure_genotypesex_bifidum_Male$plot <- figure_genotypesex_bifidum_Male$plot + 
                                        ggplot2::annotate("text", 
                                        x = 8, y = 1.15, # x and y coordinates of the text
                                        label=expression(paste("Log-rank; ",italic("P") > "0.05")),hjust=0.5, size=5.5)
  

figure_genotypesex_bifidum_Male

Breve

set.seed(123)
# All four together

## Shape data
df_breve_abxpro<- df_shape_detection_abx %>% 
  filter(Treatment == "Breve")
df_breve_abxpro_Male<- df_shape_detection_abx %>% 
  filter(Treatment == "Breve" & Sex == "Male")

## log-rank test
### We can conduct between-group significance tests using a log-rank test. The log-rank test equally weights observations over the entire follow-up time and is the most common way to compare survival times between groups. 
survdiff(Surv(Disappear_time, status) ~ Genotype + Sex, data = df_breve_abxpro_Male)
## Call:
## survdiff(formula = Surv(Disappear_time, status) ~ Genotype + 
##     Sex, data = df_breve_abxpro_Male)
## 
##                         N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=WT, Sex=Male   8        6      6.5    0.0385     0.156
## Genotype=KO, Sex=Male   8        7      6.5    0.0385     0.156
## 
##  Chisq= 0.2  on 1 degrees of freedom, p= 0.7
# Male (WT vs KO)

fit_genotypesex_breve_Male <- survfit(Surv(Disappear_time, status) ~ GenotypeSex, data = df_breve_abxpro_Male)
figure_genotypesex_breve_Male <- ggsurvplot(fit_genotypesex_breve_Male, data = df_breve_abxpro_Male,
           size = 1,
           title = " ",
           font.title = 10,
           xlim=c(1,14),
           xlab = "Days (D1 - D14 post-gavage)",
           break.time.by = 2,
           surv.scale = "percent",
           ylim=c(0,1.15),
           ylab = "B. breve persistence rate",
           break.y.by = 0.2,
           font.x=14,
           font.y=14,
           pval = FALSE, pval.method = FALSE,
           pval.coord = c(1,0.05),
           pval.method.coord = c(1,0.1),
           pval.size = 5,
           pval.method.size=5,
           conf.int = FALSE,
           conf.int.style = c("ribbon", "step"),
           conf.int.alpha = 0.2,
           surv.plot.height = 1,
           legend = c("bottom"),
           legend.title = "Genotype x Sex",
           legend.labs = c("WT Male","KO Male"),
           palette = c("#3C5488B2","#7E6148B2"),
           ggtheme = theme(axis.text.x=element_text(colour="black", face="plain", size=16), 
                           axis.text.y=element_text(colour="black", face="plain", size=16),
                           axis.title.x=element_text(margin = margin(t = 5), size=16),
                           axis.title.y=element_text(margin = margin(r = 5), size=16),
                           axis.title = element_text(face="plain", size = 10),
                           plot.title = element_text(size=10, hjust=0.5),
                           legend.title = element_text(size = 14),
                           legend.text = element_text(size = 14),
                           panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
                           panel.background = element_rect(fill = "white"),
                           legend.background = element_rect(color="transparent"),
                           aspect.ratio = 1,
                           plot.background = element_rect(fill="transparent", colour = "transparent"))
           )

figure_genotypesex_breve_Male$plot <- figure_genotypesex_breve_Male$plot + 
                                        ggplot2::annotate("text", 
                                        x = 8, y = 1.15, # x and y coordinates of the text
                                        label=expression(paste("Log-rank; ",italic("P") > "0.05")),hjust=0.5, size=5.5)
  
figure_genotypesex_breve_Male

Probiotics treatment

Shape data

df_shape_detection_pro <- df_all_model2 %>% 
  filter(Model=="2") %>% 
  filter(Group == "PRO") %>% 
  select(Timepoint, Group, Treatment, GenotypeSex, Mice_ID, Detection) %>% 
  pivot_wider(names_from = Timepoint,
              values_from = Detection) %>% 
  mutate(Disappear_time = ifelse(D7_post_gavage == "Positive", 7,
                              ifelse(D5_post_gavage == "Positive", 7,
                                     ifelse (D3_post_gavage == "Positive", 5,
                                             ifelse(D2_post_gavage == "Positive",3,
                                                    ifelse(D1_post_gavage == "Positive",2,1)))))) %>% 
  mutate(status = ifelse(D7_post_gavage == "Positive", 0,1)) %>% 
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female")))

Genotype x sex

Infantis

set.seed(123)
# Days post-gavage
## Shape data
df_infantis_pro<- df_shape_detection_pro %>% 
  filter(Treatment == "Infantis")

## Cox regression
mv_fit <- coxph(Surv(Disappear_time, status) ~ Genotype + Sex, data = df_infantis_pro)
mv_fit
## Call:
## coxph(formula = Surv(Disappear_time, status) ~ Genotype + Sex, 
##     data = df_infantis_pro)
## 
##                coef exp(coef) se(coef)      z     p
## GenotypeKO  0.08404   1.08767  0.40637  0.207 0.836
## SexFemale  -0.68961   0.50177  0.41049 -1.680 0.093
## 
## Likelihood ratio test=2.92  on 2 df, p=0.2324
## n= 26, number of events= 26 
##    (6 observations deleted due to missingness)
cz <- cox.zph(mv_fit)
print(cz)
##          chisq df    p
## Genotype  1.08  1 0.30
## Sex       2.25  1 0.13
## GLOBAL    2.46  2 0.29
## log-rank test
### We can conduct between-group significance tests using a log-rank test. The log-rank test equally weights observations over the entire follow-up time and is the most common way to compare survival times between groups. 
survdiff(Surv(Disappear_time, status) ~ Genotype + Sex, data = df_infantis_pro)
## Call:
## survdiff(formula = Surv(Disappear_time, status) ~ Genotype + 
##     Sex, data = df_infantis_pro)
## 
## n=26, 6 observations deleted due to missingness.
## 
##                         N Observed Expected (O-E)^2/E (O-E)^2/V
## Genotype=WT, Sex=Male   8        8     5.77     0.864     3.040
## Genotype=WT, Sex=Female 4        4     5.61     0.461     1.997
## Genotype=KO, Sex=Male   8        8     6.71     0.246     0.876
## Genotype=KO, Sex=Female 6        6     7.91     0.462     2.119
## 
##  Chisq= 6.2  on 3 degrees of freedom, p= 0.1
# Male (WT vs KO)
df_infantis_pro_Male<- df_shape_detection_pro %>% 
  filter(Treatment == "Infantis" & Sex == "Male")
fit_genotypesex_infantis_Male <- survfit(Surv(Disappear_time, status) ~ GenotypeSex, data = df_infantis_pro_Male)
figure_genotypesex_pro_infantis_Male <- ggsurvplot(fit_genotypesex_infantis_Male, data = df_infantis_pro_Male,
           size = 1,
           title = "",
           font.title = 14,
           xlim=c(1,7),
           xlab = "Days (D1 - D7 post-gavage)",
           break.time.by = 1,
           surv.scale = "percent",
           ylim=c(0,1.1),
           ylab = "B. infantis persistence rate",
           break.y.by = 0.2,
           font.x=14,
           font.y=14,
           pval = FALSE, pval.method = FALSE,
           pval.coord = c(1,0.05),
           pval.method.coord = c(1,0.1),
           pval.size = 5,
           pval.method.size=5,
           conf.int = FALSE,
           conf.int.style = c("ribbon", "step"),
           conf.int.alpha = 0.2,
           surv.plot.height = 1,
           legend = c("bottom"),
           legend.title = "Genotype x Sex",
           legend.labs = c("WT Male","KO Male"),
           palette = c("#3C5488B2","#7E6148B2"),
           ggtheme = theme(axis.text.x=element_text(colour="black", face="plain", size=14), 
                           axis.text.y=element_text(colour="black", face="plain", size=14),
                           axis.title.x=element_text(margin = margin(t = 5), size=16),
                           axis.title.y=element_text(margin = margin(r = 5), size=16),
                           axis.title = element_text(face="plain", size = 10),
                           plot.title = element_text(size=10, hjust=0.5),
                           legend.title = element_text(size = 14),
                           legend.text = element_text(size = 14),
                           panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
                           panel.background = element_rect(fill = "white"),
                           legend.background = element_rect(color="transparent"),
                           aspect.ratio = 1,
                           plot.background = element_rect(fill="transparent", colour = "transparent"))
           )

figure_genotypesex_pro_infantis_Male$plot <- figure_genotypesex_pro_infantis_Male$plot + 
                                        ggplot2::annotate("text", 
                                        x = 4, y = 1.1, # x and y coordinates of the text
                                        label=expression(paste("Log-rank; ",italic("P") > "0.05")),hjust=0.5, size=5)
  
figure_genotypesex_pro_infantis_Male

# Days During gavage + post-gavage
## Shape data

df_shape_detection_pro <- df_all_model2 %>% 
  filter(Model=="2") %>% 
  filter(Group == "PRO") %>% 
  select(Timepoint, Group, Treatment, GenotypeSex, Mice_ID, Detection) %>% 
  pivot_wider(names_from = Timepoint,
              values_from = Detection) %>% 
  filter(Treatment == "Infantis") %>% 
  mutate(Disappear_time = ifelse(D7_post_gavage == "Positive", 12,
                              ifelse(D5_post_gavage == "Positive", 12,
                                     ifelse (D3_post_gavage == "Positive", 10,
                                             ifelse (D2_post_gavage == "Positive", 8,
                                                     ifelse(D1_post_gavage == "Positive",7, 
                                                            ifelse(D5_during_gavage == "Positive",6,
                                                                   ifelse(D4_during_gavage == "Positive",5,
                                                                          ifelse(D2_during_gavage == "Positive",4,2)))))))))%>% 
  mutate(status = ifelse(D7_post_gavage == "Positive", 0,1)) %>% 
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  filter(Sex == "Male")

# Male (WT vs KO)
fit_genotypesex_infantis_Male <- survfit(Surv(Disappear_time, status) ~ GenotypeSex, data = df_shape_detection_pro)
figure_genotypesex_pro_infantis_Male <- ggsurvplot(fit_genotypesex_infantis_Male, data = df_shape_detection_pro,
           size = 1,
           title = "",
           font.title = 14,
           xlim=c(1,12),
           xlab = "Days",
           break.time.by = 1,
           surv.scale = "percent",
           ylim=c(0,1.15),
           ylab = "B. infantis persistence rate",
           break.y.by = 0.2,
           font.x=14,
           font.y=14,
           pval = FALSE, pval.method = FALSE,
           pval.coord = c(1,0.05),
           pval.method.coord = c(1,0.1),
           pval.size = 5,
           pval.method.size=5,
           conf.int = FALSE,
           conf.int.style = c("ribbon", "step"),
           conf.int.alpha = 0.2,
           surv.plot.height = 1,
           legend = c("bottom"),
           legend.title = "Genotype x Sex",
           legend.labs = c("WT Male","KO Male"),
           palette = c("#3C5488B2","#7E6148B2"),
           ggtheme = theme(axis.text.x=element_text(colour="black", face="plain", size=14), 
                           axis.text.y=element_text(colour="black", face="plain", size=14),
                           axis.title.x=element_text(margin = margin(t = 5), size=14),
                           axis.title.y=element_text(margin = margin(r = 5), size=14),
                           axis.title = element_text(face="plain", size = 10),
                           plot.title = element_text(size=10, hjust=0.5),
                           legend.title = element_text(size = 14),
                           legend.text = element_text(size = 14),
                           panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
                           panel.background = element_rect(fill = "white"),
                           legend.background = element_rect(color="transparent"),
                           aspect.ratio = 0.8,
                           plot.background = element_rect(fill="transparent", colour = "transparent"))
           )

figure_genotypesex_pro_infantis_Male$plot <- figure_genotypesex_pro_infantis_Male$plot + 
                                        ggplot2::annotate("text", 
                                        x = 9.5, y = 1.15, # x and y coordinates of the text
                                        label=expression(paste("Log-rank; ",italic("P") > "0.05")),hjust=0.5, size=5)+
  geom_vline(xintercept = 6, linetype ="dashed", color ="black", alpha = 0.3)
  
figure_genotypesex_pro_infantis_Male